x86: IRQ Migration logic enhancement.
authorKeir Fraser <keir.fraser@citrix.com>
Mon, 26 Oct 2009 13:33:38 +0000 (13:33 +0000)
committerKeir Fraser <keir.fraser@citrix.com>
Mon, 26 Oct 2009 13:33:38 +0000 (13:33 +0000)
To programme MSI's addr/vector safely, delay irq migration
operation before acking next interrupt. In this way, it should
avoid inconsistent interrupts generation due to non-atomic writing
addr and data registers about MSI.

Port the logic from Linux and tailor it for Xen.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
xen/arch/x86/hpet.c
xen/arch/x86/hvm/hvm.c
xen/arch/x86/io_apic.c
xen/arch/x86/irq.c
xen/arch/x86/msi.c
xen/include/asm-x86/io_apic.h
xen/include/asm-x86/irq.h
xen/include/xen/irq.h

index 1090f2dd5954766f67a9aee73550ca9b25b97a82..f2ba93a883050646784986249e98a00804b8006b 100644 (file)
@@ -289,6 +289,7 @@ static void hpet_msi_ack(unsigned int irq)
     struct irq_desc *desc = irq_to_desc(irq);
 
     irq_complete_move(&desc);
+    move_native_irq(irq);
     ack_APIC_irq();
 }
 
index 8a14a8e13e6432408207d20130e1ae9e0f449b4d..60fd0a563560f6720530db7af48d82ae0a594bd7 100644 (file)
@@ -243,7 +243,7 @@ void hvm_migrate_pirqs(struct vcpu *v)
             continue;
         irq = desc - irq_desc;
         ASSERT(MSI_IRQ(irq));
-        desc->handler->set_affinity(irq, *cpumask_of(v->processor));
+        irq_set_affinity(irq, *cpumask_of(v->processor));
         spin_unlock_irq(&desc->lock);
     }
     spin_unlock(&d->event_lock);
index 265e40fc3332ee79694dd9a77b8c0386a3019b8a..4337484fdb00c366a3c1e8c2871f5bf2314c31d0 100644 (file)
@@ -1379,6 +1379,7 @@ static void ack_edge_ioapic_irq(unsigned int irq)
     struct irq_desc *desc = irq_to_desc(irq);
     
     irq_complete_move(&desc);
+    move_native_irq(irq);
 
     if ((desc->status & (IRQ_PENDING | IRQ_DISABLED))
         == (IRQ_PENDING | IRQ_DISABLED))
@@ -1419,6 +1420,38 @@ static void setup_ioapic_ack(char *s)
 }
 custom_param("ioapic_ack", setup_ioapic_ack);
 
+static bool_t io_apic_level_ack_pending(unsigned int irq)
+{
+    struct irq_pin_list *entry;
+    unsigned long flags;
+
+    spin_lock_irqsave(&ioapic_lock, flags);
+    entry = &irq_2_pin[irq];
+    for (;;) {
+        unsigned int reg;
+        int pin;
+
+        if (!entry)
+            break;
+
+        pin = entry->pin;
+        if (pin == -1)
+            continue;
+        reg = io_apic_read(entry->apic, 0x10 + pin*2);
+        /* Is the remote IRR bit set? */
+        if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+            spin_unlock_irqrestore(&ioapic_lock, flags);
+            return 1;
+        }
+        if (!entry->next)
+            break;
+        entry = irq_2_pin + entry->next;
+    }
+    spin_unlock_irqrestore(&ioapic_lock, flags);
+
+    return 0;
+}
+
 static void mask_and_ack_level_ioapic_irq (unsigned int irq)
 {
     unsigned long v;
@@ -1456,6 +1489,10 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq)
     v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
 
     ack_APIC_irq();
+    
+    if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+       !io_apic_level_ack_pending(irq))
+        move_native_irq(irq);
 
     if (!(v & (1 << (i & 0x1f)))) {
         atomic_inc(&irq_mis_count);
@@ -1503,6 +1540,10 @@ static void end_level_ioapic_irq (unsigned int irq)
 
     ack_APIC_irq();
 
+    if ((irq_desc[irq].status & IRQ_MOVE_PENDING) &&
+            !io_apic_level_ack_pending(irq))
+        move_native_irq(irq);
+
     if (!(v & (1 << (i & 0x1f)))) {
         atomic_inc(&irq_mis_count);
         spin_lock(&ioapic_lock);
@@ -1564,6 +1605,7 @@ static void ack_msi_irq(unsigned int irq)
     struct irq_desc *desc = irq_to_desc(irq);
 
     irq_complete_move(&desc);
+    move_native_irq(irq);
 
     if ( msi_maskable_irq(desc->msi_desc) )
         ack_APIC_irq(); /* ACKTYPE_NONE */
index 74d096f462c66e4e50873164d4a65a4ff0ef69ee..6e566ab9d90b0b9f589121c887a8452b7d965731 100644 (file)
@@ -455,6 +455,67 @@ void __setup_vector_irq(int cpu)
     }
 }
 
+void move_masked_irq(int irq)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+               return;
+    
+    desc->status &= ~IRQ_MOVE_PENDING;
+
+    if (unlikely(cpus_empty(desc->pending_mask)))
+        return;
+
+    if (!desc->handler->set_affinity)
+        return;
+
+       /*
+        * If there was a valid mask to work with, please
+        * do the disable, re-program, enable sequence.
+        * This is *not* particularly important for level triggered
+        * but in a edge trigger case, we might be setting rte
+        * when an active trigger is comming in. This could
+        * cause some ioapics to mal-function.
+        * Being paranoid i guess!
+        *
+        * For correct operation this depends on the caller
+        * masking the irqs.
+        */
+    if (likely(cpus_intersects(desc->pending_mask, cpu_online_map)))
+        desc->handler->set_affinity(irq, desc->pending_mask);
+
+       cpus_clear(desc->pending_mask);
+}
+
+void move_native_irq(int irq)
+{
+    struct irq_desc *desc = irq_to_desc(irq);
+
+    if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+        return;
+
+    if (unlikely(desc->status & IRQ_DISABLED))
+        return;
+
+    desc->handler->disable(irq);
+    move_masked_irq(irq);
+    desc->handler->enable(irq);
+}
+
+/* For re-setting irq interrupt affinity for specific irq */
+void irq_set_affinity(int irq, cpumask_t mask)
+{
+    struct irq_desc *desc = irq_to_desc(irq);
+    
+    if (!desc->handler->set_affinity)
+        return;
+    
+    ASSERT(spin_is_locked(&desc->lock));
+    desc->status |= IRQ_MOVE_PENDING;
+    cpus_copy(desc->pending_mask, mask);
+}
+
 asmlinkage void do_IRQ(struct cpu_user_regs *regs)
 {
     struct irqaction *action;
index 6bf4e6ef6980eee1c2ab0e996426dc87b5185e38..261da58185635f9e1e0d01670e91963e67e61af2 100644 (file)
@@ -231,7 +231,6 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
         u8 slot = PCI_SLOT(dev->devfn);
         u8 func = PCI_FUNC(dev->devfn);
 
-        mask_msi_irq(entry->irq);
         pci_conf_write32(bus, slot, func, msi_lower_address_reg(pos),
                          msg->address_lo);
         if ( entry->msi_attrib.is_64 )
@@ -244,7 +243,6 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
         else
             pci_conf_write16(bus, slot, func, msi_data_reg(pos, 0),
                              msg->data);
-        unmask_msi_irq(entry->irq);
         break;
     }
     case PCI_CAP_ID_MSIX:
@@ -252,13 +250,11 @@ static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
         void __iomem *base;
         base = entry->mask_base;
 
-        mask_msi_irq(entry->irq);
         writel(msg->address_lo,
                base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
         writel(msg->address_hi,
                base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
         writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
-        unmask_msi_irq(entry->irq);
         break;
     }
     default:
index e064d4a7d5d3423b6cc0d460cbcb41a57a5d0071..6781ac1bac0141ac4867ddccfac55dc37ea53989 100644 (file)
 
 #define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid)
 
+/* I/O Unit Redirection Table */
+#define IO_APIC_REDIR_VECTOR_MASK   0x000FF
+#define IO_APIC_REDIR_DEST_LOGICAL  0x00800
+#define IO_APIC_REDIR_DEST_PHYSICAL 0x00000
+#define IO_APIC_REDIR_SEND_PENDING  (1 << 12)
+#define IO_APIC_REDIR_REMOTE_IRR    (1 << 14)
+#define IO_APIC_REDIR_LEVEL_TRIGGER (1 << 15)
+#define IO_APIC_REDIR_MASKED        (1 << 16)
+
 /*
  * The structure of the IO-APIC:
  */
index c4fbaeb58d7e32bf464c68bac0482c65bb09531d..90f5fd26f37fdd84a87050062468960f5bdd3b77 100644 (file)
@@ -138,6 +138,12 @@ int __assign_irq_vector(int irq, struct irq_cfg *cfg, cpumask_t mask);
 
 int bind_irq_vector(int irq, int vector, cpumask_t domain);
 
+void move_native_irq(int irq);
+
+void move_masked_irq(int irq);
+
+void irq_set_affinity(int irq, cpumask_t mask);
+
 #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
 #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
 
index bf81b6b8db82acd9f410a29b9e3b8f78b452489e..23565ae2f38091f8f33107f570264f4ad22a7e9e 100644 (file)
@@ -24,6 +24,7 @@ struct irqaction {
 #define IRQ_REPLAY     8       /* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
 #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
+#define IRQ_MOVE_PENDING      64  /* IRQ is migrating to another CPUs */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
 
 /* Special IRQ numbers. */
@@ -75,6 +76,7 @@ typedef struct irq_desc {
     int irq;
     spinlock_t lock;
     cpumask_t affinity;
+    cpumask_t pending_mask;  /* IRQ migration pending mask */
 
     /* irq ratelimit */
     s_time_t rl_quantum_start;